In [1]:
import sys
sys.path.append('../../')
In [2]:
%%capture
import pandas as pd
import cormerant as cmt
import scanpy as sc
import squidpy as sq
from matplotlib import pyplot as plt
import os
import numpy as np
import geopandas as gpd
import shapely
In [3]:
os.chdir('/Users/DouglasHannumJr/Desktop/s3_bucket_data/s3_bucket_data/')
os.listdir()
Out[3]:
['adata.h5ad',
 'adata.pkl',
 'all_cell_nbhd.parquet',
 'anndata.h5ad',
 'anndata.pickle',
 'cell_boundaries.parquet',
 'cell_by_gene.csv',
 'cell_metadata.csv',
 'images',
 'leiden_alpha.csv',
 'meta_cell_with_clusters.csv',
 'neuro_panel_cluster_annotation.csv',
 'partitioned_transcripts.csv',
 'partitioned_transcripts.parquet',
 'partitioned_transcripts_mill.csv',
 'partitioned_transcripts_tenmill.csv',
 'spat.Rds',
 'updated_cell_metadata.csv']
In [4]:
adata = sq.read.vizgen('./',
                       counts_file = 'cell_by_gene.csv',
                       meta_file='cell_metadata.csv',
                      )
md = pd.read_csv('./cell_metadata.csv',index_col=0)
In [5]:
adata.obs['EntityID'] = md.index
In [6]:
plt.hist(np.log10(adata.obs['transcript_count']))
plt.show()
In [7]:
adata.var_names_make_unique()
sc.pp.filter_cells(adata, min_counts = 50)
In [8]:
%%time
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.scale(adata, max_value=10)
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=20)
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution = .5)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\umap\distances.py:1063: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\umap\distances.py:1071: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\umap\distances.py:1086: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\umap\umap_.py:660: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
CPU times: total: 2min 52s
Wall time: 2min 14s
In [9]:
sc.set_figure_params(figsize=(10,10))
sc.pl.umap(adata, color = ['leiden'], size = 5)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
In [10]:
sq.pl.spatial_scatter(
    adata,
    shape = None, color = 'leiden', size = 0.5,
    library_id = 'spatial', figsize = (10,10))
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\squidpy\pl\_spatial_utils.py:956: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  _cax = scatter(
In [11]:
markers = pd.read_csv('neuro_panel_cluster_annotation.csv', index_col = 0)
markers.index = markers.gene
markers.head()
Out[11]:
gene Class ClusterName Description Neurotransmitter Region
gene
Igf2 Igf2 Vascular VECA Vascular endothelial cells, arterial NaN CNS
Ngfr Ngfr Neurons SYNOR4 Noradrenergic erector muscle neurons Noradrenaline Sympathetic ganglion
Ccnd2 Ccnd2 Neurons SZNBL Neuronal intermidate progenitor cells NaN Striatum dorsal, Striatum ventral, Dentate gyrus
Th Th Neurons SYNOR4 Noradrenergic erector muscle neurons Noradrenaline Sympathetic ganglion
Lhx2 Lhx2 Neurons TEGLU13 Excitatory neurons, cerebral cortex Glutamate (VGLUT1,VGLUT2), Nitric oxide Cortex
In [12]:
adata.var = pd.merge(adata.var, markers, how = 'left',
                     left_index = True, 
                     right_index = True)
In [13]:
adata.var.Class.value_counts()
Out[13]:
Class
Neurons           106
Vascular           13
Ependymal           5
Oligos              5
Astrocytes          5
Immune              4
PeripheralGlia      3
Name: count, dtype: int64
In [14]:
markers.Class.value_counts()
Out[14]:
Class
Neurons           388
Vascular           30
Oligos             27
Ependymal          18
PeripheralGlia     18
Astrocytes         11
Immune              8
Name: count, dtype: int64
In [15]:
%%capture
sc.tl.rank_genes_groups(adata, 'leiden', method = 'wilcoxon')
In [16]:
%%capture
top_markers = pd.DataFrame()
for cluster in adata.obs['leiden'].unique():
    markers_ = adata.uns['rank_genes_groups']['names'][cluster][:10]
    scores = adata.uns['rank_genes_groups']['scores'][cluster][:10]
    top_markers = pd.concat([top_markers, pd.DataFrame({'cluster': [cluster]*10,
                                                        'gene': markers_,
                                                        'scores': scores})])
In [17]:
markers = markers.reset_index(drop = True)
top_markers = pd.merge(top_markers, markers, on = 'gene', how = 'left')
marker_table = pd.crosstab(top_markers.cluster, top_markers.Class)
In [18]:
labels = marker_table.idxmax(axis = 1)
labels.index = labels.index.astype(dtype = 'int')
labels = labels.sort_index()
labels.name = 'cluster_type'
adata.obs['cluster'] = adata.obs.leiden.astype('int')
adata.obs = pd.merge(adata.obs, labels, how = 'left', on = 'cluster')
adata.obs['cluster'] = adata.obs.leiden.astype('str') + '-' + adata.obs.cluster_type
In [19]:
sc.pl.umap(adata, color = 'cluster', size = 5)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
In [20]:
sq.pl.spatial_scatter(
    adata,
    shape = None,
    color = 'cluster',
    size = .75,
    library_id = 'spatial',
    figsize = (10,10)
)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\squidpy\pl\_spatial_utils.py:956: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  _cax = scatter(
In [21]:
sc.pl.umap(adata, color = 'cluster_type', size = 5)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
In [22]:
sq.pl.spatial_scatter(
    adata,
    shape = None,
    color = 'cluster_type',
    size = .5,
    library_id = 'spatial',
    figsize = (10,10)
)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\squidpy\pl\_spatial_utils.py:956: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  _cax = scatter(

Alpha Shape Stuff

In [23]:
%%time
sq.gr.spatial_neighbors(adata, coord_type = 'generic', spatial_key = 'spatial')
sq.gr.nhood_enrichment(adata, cluster_key = 'cluster')
  0%|          | 0/1000 [00:00<?, ?/s]
CPU times: total: 17.8 s
Wall time: 29.7 s
In [24]:
import pickle

with open('adata.pkl','wb') as file:
    pickle.dump(adata, file)
In [25]:
alpha = gpd.read_parquet('/Users/DouglasHannumJr/Desktop/s3_bucket_data/s3_bucket_data/all_cell_nbhd.parquet')

Subcellular¶

Lettuce look at a vascular cell.

In [26]:
cell_colors = dict(zip(adata.obs['cluster_type'].cat.categories.tolist(),
                       adata.uns['cluster_type_colors']))
In [27]:
adata.obs[['center_x','center_y']] = adata.obsm['spatial']
adata.obs.index = adata.obs.EntityID
In [28]:
adata.obs.to_csv('./updated_cell_metadata.csv')
In [29]:
import importlib
In [30]:
importlib.reload(cmt.viz)
Out[30]:
<module 'cormerant.viz' from 'C:\\Users\\DouglasHannumJr\\cormerant-internal\\cormerant\\viz\\__init__.py'>
In [31]:
%%time

cmt.viz.plot_3d_cellPortraits_locations(
    md = adata.obs,
    cell_idx = adata.obs.index[100:110],
    alpha_shape_df=alpha,
    cell_idx_groups=adata.obs.cluster_type[100:110],
    scale = 10
)
embed
Edit @cellvisulization/1-0-2-global-cell-portraits-alpha-shape on Observable
CPU times: total: 703 ms
Wall time: 721 ms
In [32]:
%%time
importlib.reload(cmt.viz)

cmt.viz.plot_3d(
    directory = './', 
    cell_idx = 121919181900000610,
    local=True,
    cell_categories='cluster_type',
    window_size=200,
    max_number_of_cells=10000,
    pl_batch_size = None,
    transformation_file = 'images/micron_to_mosaic_pixel_transform.csv',
    cell_boundaries_file= 'cell_boundaries.parquet',
    metadata_filename = 'updated_cell_metadata.csv',
    image_file = 'images/mosaic_DAPI_z0.tif',
    transcripts_file = 'partitioned_transcripts.csv',   
    cell_colors = cell_colors
)
loading metadata
loading cell boundaries
loading image
loading transcripts
processing data
embed
Edit @cellvisulization/2-3-3-deck-gl-image-polygon-scatter-3d on Observable
CPU times: total: 46.8 s
Wall time: 21.2 s
In [33]:
import random
random.seed(101)

cell_idx = random.sample(adata.obs.index.to_list(), 9)
cell_idx_groups = adata.obs.loc[cell_idx,'cluster_type']
In [34]:
cell_idx_groups
Out[34]:
EntityID
121919181900076205     Neurons
121919181900025544     Neurons
121919181900070708     Neurons
121919181900047035     Neurons
121919181900061255     Neurons
121919181900006364      Immune
121919181900066025     Neurons
121919181900028136    Vascular
121919181900029077      Oligos
Name: cluster_type, dtype: category
Categories (5, object): ['Astrocytes', 'Immune', 'Neurons', 'Oligos', 'Vascular']
In [35]:
adata.obs.loc[cell_idx,:]
Out[35]:
fov volume min_x min_y max_x max_y anisotropy transcript_count perimeter_area_ratio solidity EntityID n_counts leiden cluster cluster_type center_x center_y
EntityID
121919181900076205 NaN 672.675045 7326.800148 5471.750808 7336.865748 5483.220408 1.177693 532 0.500019 4.906303 121919181900076205 526.0 11 11-Neurons Neurons 7331.572549 5477.365879
121919181900025544 NaN 1325.321019 7869.038569 3476.540613 7882.776169 3495.138213 1.368497 926 0.354131 4.572747 121919181900025544 921.0 11 11-Neurons Neurons 7875.936714 3485.659940
121919181900070708 NaN 939.646173 4442.402818 2810.493718 4455.168419 2822.395319 1.563477 546 0.431894 5.730559 121919181900070708 543.0 2 2-Neurons Neurons 4448.540029 2816.330188
121919181900047035 NaN 1359.322747 3847.110200 2241.227858 3861.927800 2255.721458 1.159034 418 0.345056 5.816319 121919181900047035 416.0 5 5-Neurons Neurons 3854.774056 2248.176539
121919181900061255 NaN 507.194020 798.952706 4875.546737 808.154306 4886.584337 1.272157 316 0.630637 4.961753 121919181900061255 315.0 9 9-Neurons Neurons 803.610721 4881.899592
121919181900006364 NaN 530.426260 5394.095623 4250.803436 5406.537223 4264.649036 1.363851 150 0.776861 3.214264 121919181900006364 150.0 12 12-Immune Immune 5399.282633 4258.382778
121919181900066025 NaN 1090.619165 6756.297692 2712.602579 6767.983293 2725.476179 1.228211 579 0.382697 5.990143 121919181900066025 578.0 9 9-Neurons Neurons 6761.684680 2719.416899
121919181900028136 NaN 319.863988 7911.997957 1578.393001 7918.607557 1586.622602 1.268324 122 0.650445 4.958105 121919181900028136 122.0 4 4-Vascular Vascular 7915.275176 1582.522094
121919181900029077 NaN 912.352412 3380.432441 5957.910657 3392.982041 5974.456258 1.363795 300 0.524158 4.337677 121919181900029077 299.0 0 0-Oligos Oligos 3386.840161 5968.146810
In [36]:
%%time
importlib.reload(cmt.viz)


cmt.viz.plot_3d_cellPortraits(
    directory = './', 
    grid = (3,3),
    cell_idx = cell_idx,
    cell_idx_groups = cell_idx_groups,
    local=True,
    cell_categories='cluster_type',
    window_size=50,
    pl_batch_size = None,
    transformation_file = 'images/micron_to_mosaic_pixel_transform.csv',
    cell_boundaries_file= 'cell_boundaries.parquet',
    metadata_filename = 'updated_cell_metadata.csv',
    image_file = 'images/mosaic_DAPI_z0.tif',
    transcripts_file = 'partitioned_transcripts.csv',   
    cell_colors = cell_colors
)
loading metadata
loading cell boundaries
loading image
processing images
loading transcripts
processing data
embed
Edit @cellvisulization/1-0-0-cell-portrait-viewer on Observable
CPU times: total: 1min 12s
Wall time: 26.2 s
In [43]:
importlib.reload(cmt.viz)
Out[43]:
<module 'cormerant.viz' from 'C:\\Users\\DouglasHannumJr\\cormerant-internal\\cormerant\\viz\\__init__.py'>
In [44]:
%%time

cmt.viz.plot_3d_cellPortraits_alpha_minimap(
    directory = './', 
    grid = (3,3),
    cell_idx = cell_idx,
    cell_idx_groups = cell_idx_groups,
    local=True,
    cell_categories='cluster_type',
    window_size=50,
    pl_batch_size = None,
    minimap_box_scale=10,
    alpha_shape_df = alpha,
    transformation_file = 'images/micron_to_mosaic_pixel_transform.csv',
    cell_boundaries_file= 'cell_boundaries.parquet',
    metadata_filename = 'updated_cell_metadata.csv',
    image_file = 'images/mosaic_DAPI_z0.tif',
    transcripts_file = 'partitioned_transcripts.csv',   
    cell_colors = cell_colors
)
loading metadata
loading cell boundaries
loading image
processing images
loading transcripts
processing data
embed
Edit @cellvisulization/1-0-3-cell-portrait-viewer on Observable
CPU times: total: 57 s
Wall time: 27 s
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: